Longevity Analysis

In this notebook, we try to analyse what makes a character succeed. We first calculate a metric to evaluate if they are famous or not. Then we do all kind of plot for the website: general analysis, creativity analysis, diversity, longevity...

In [1]:
# Import libraries
import pandas as pd
import numpy as np
%matplotlib inline
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import math
import re
import string
import pickle
from collections import Counter

import plotly
import plotly.graph_objects as go
import chart_studio
import chart_studio.tools as tls
import chart_studio.plotly as py
from IPython.display import HTML
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pyperclip
import os

Load the dataset

In [2]:
marvel_pers = pd.read_pickle("data_pickle/marvel_pers_final.txt")
dc_pers = pd.read_pickle("data_pickle/dc_pers_final.txt")
In [3]:
def clean_years(l):
    if l is None:
        new_l = []
    else:
        new_l = [year for year in l if year>1930]
        
    return new_l
    
dc_pers['years'] = dc_pers['years'].apply(clean_years)
marvel_pers['years'] = marvel_pers['years'].apply(clean_years)
dc_pers['years']
dc_pers['First_apparition'] = dc_pers['years'].apply(lambda x: min(x) if len(x)!=0 else None)    
marvel_pers['First_apparition'] = marvel_pers['years'].apply(lambda x: min(x) if len(x)!=0 else None)   

Find the longevity of each character, i.e. the number of year it appears

In [4]:
marvel_pers['Longevity'] = marvel_pers['years'].apply(lambda years: len(list(set(years))))
dc_pers['Longevity'] = dc_pers['years'].apply(lambda years: len(list(set(years))))

Let's remove characters that don't appear at all in comics

In [5]:
marvel_pers.drop(index = marvel_pers[marvel_pers['Longevity']==0].index,inplace=True)
dc_pers.drop(index = dc_pers[dc_pers['Longevity']==0].index,inplace=True)
In [6]:
marvel_pers.head(10)
Out[6]:
URL Real Name Identity Current Alias Citizenship Marital Status Occupation Education Gender Height ... Good_count Bad_count Neutral_count Dates Behavior Number_of_apparitions DatesString years First_apparition Longevity
1 /wiki/2-D_(Earth-616) Darell (full name unrevealed) Secret Identity 2-D American Single Adventurer Unknown Male Unknown ... 4 0 0 [January, 2008, July, 2011, September, 2011, J... Good 4 January, 2008,July, 2011,September, 2011,Janua... [2008, 2011, 2011, 2019] 2008.0 3
2 /wiki/Abraham_Erskine_(Earth-616) Abraham Erskine Known to Authorities Identity Dr. Joseph Reinstein German, American Married Scientist Advanced College Degree Male 5' 6" (1.68 m) ... 40 1 50 [March, 1941, November, 1946, March, 1965, Jan... Neutral 91 March, 1941,November, 1946,March, 1965,January... [1941, 1946, 1965, 1969, 1971, 1975, 1976, 197... 1941.0 35
3 /wiki/11-Ball_(Earth-616) Unknown Secret Identity 11-Ball American Single Professional criminal; former henchman Unknown Male Unknown ... 0 1 0 [July, 1991] Bad 1 July, 1991 [1991] 1991.0 1
4 /wiki/Abraham_(Earth-616) Abraham No Dual Identity Unknown Unknown Married Prophet Unknown Male Unknown ... 1 0 6 [December, 1953, February, 1954, December, 195... Neutral 7 December, 1953,February, 1954,December, 1953,N... [1953, 1954, 1953, 1988, 2011, 2017, 2020] 1953.0 6
5 /wiki/Abarac_(Earth-616) Abarac No Dual Identity Unknown Cybernian Single Court magician, advisor Unknown Male Unknown ... 1 0 0 [November, 1970] Good 1 November, 1970 [1970] 1970.0 1
6 /wiki/Abdul_Faoul_(Earth-616) Professor Abdul Faoul Secret Identity Scarlet Scarab Egyptian Single Archeologist, adventurer Unknown Male Unknown ... 0 4 3 [December, 1977, February, 1978, June, 2014, J... Bad 7 December, 1977,February, 1978,June, 2014,July,... [1977, 1978, 2014, 2014, 1981, 1982, 2013] 1977.0 6
7 /wiki/A.C._O%27Connor_(Earth-616) A. C. O'Connor No Dual Identity Ace O'Connor American Single Journalist Unknown Female Unknown ... 0 0 5 [September, 1980, October, 1980, December, 198... Neutral 5 September, 1980,October, 1980,December, 1980,F... [1980, 1980, 1980, 1981, 1981] 1980.0 2
8 /wiki/7-X9_(Earth-616) Unknown No Dual Identity 7-X9 Unknown Unknown Unknown Unknown Male Unknown ... 0 1 0 [March, 2016] Bad 1 March, 2016 [2016] 2016.0 1
9 /wiki/803_(Earth-616) 803 No Dual Identity Unknown Unknown Single Unknown Unknown Agender Unknown ... 14 0 1 [February, 2016, January, 2016, March, 2016, A... Good 15 February, 2016,January, 2016,March, 2016,April... [2016, 2016, 2016, 2016, 2016, 2016, 2016, 201... 2016.0 2
10 /wiki/Abra_and_Cadabra_(Earth-616) Unknown Secret Identity Abra and Cadabra American Single Magicians Unknown Male Unknown ... 0 1 0 [October, 1947] Bad 1 October, 1947 [1947] 1947.0 1

10 rows × 28 columns

Discover the most famous character that are good and bad

In [7]:
#20 most appearance at MARVEL
appear_20 = marvel_pers.sort_values('Number_of_apparitions',ascending=False).head(10)[['Real Name','Current Alias']]
appear_20
Out[7]:
Real Name Current Alias
20183 Peter Benjamin Parker Spider-Man
24326 Steven "Steve" Rogers Captain America
1290 Anthony Edward "Tony" Stark Iron Man
11709 James Howlett Wolverine
25251 Thor Odinson All-Father Thor
3444 Robert Bruce Banner Hulk
21335 Reed Richards Mister Fantastic
2576 Benjamin Jacob "Ben" Grimm The Thing
22813 Scott Summers Cyclops
10461 Dr. Henry "Hank" Philip McCoy Beast
In [8]:
#20 most longevity at MARVEL
long_20 = marvel_pers.sort_values('Longevity',ascending=False).head(10)[['Real Name','Current Alias']]
long_20
Out[8]:
Real Name Current Alias
18466 Namor McKenzie Sub-Mariner
10948 The Human Torch Human Torch
24326 Steven "Steve" Rogers Captain America
19762 Patricia "Patsy" Walker Hellcat
319 Adolf Hitler Unknown
11781 James Buchanan Barnes Winter Soldier
15373 Loki Laufeyson Unknown
12750 Johann Shmidt Red Skull
13168 Jonathan Lowell Spencer "Johnny" Storm Human Torch
25251 Thor Odinson All-Father Thor
In [9]:
#both longevity and nb appearance
pd.merge(appear_20,long_20)
Out[9]:
Real Name Current Alias
0 Steven "Steve" Rogers Captain America
1 Thor Odinson All-Father Thor

We can see that only 3 characters are in the top 20 in number of appearance and in longevity. Maybe we need to define the "famousity" of a character. The famousity will be defined as the harmonic mean between the appearance and the longevity.

In [10]:
marvel_pers['Longevity'].mean()
Out[10]:
3.65801123470109
In [11]:
dc_pers['Longevity'].mean()
Out[11]:
3.411501063506533

To determine the function, we need to see the ditribution of the Longevity and NB of appearance.

In [12]:
fig, axs = plt.subplots(1,2, figsize=(12,8))
#h, bins,_ = plt.hist(marvel_pers['Number_of_apparitions'],bins=10)
nb_bins = 100
logbins = np.logspace(np.log10(1),np.log10(max(marvel_pers['Number_of_apparitions'])),nb_bins)
marvel_app = marvel_pers['Number_of_apparitions'].value_counts(normalize=True)
dc_app = dc_pers['Number_of_apparitions'].value_counts(normalize=True)
axs[0].scatter(marvel_app.index, marvel_app.values, facecolors='none', edgecolors = 'red', alpha=0.95)
axs[0].scatter(dc_app.index, dc_app.values, facecolors='none', edgecolors = 'blue', alpha=0.95)
#axs[0].hist(marvel_pers['Number_of_apparitions'], color = 'red', bins=logbins, alpha=0.7)
#axs[0].hist(dc_pers['Number_of_apparitions'], color = 'blue', bins=logbins,alpha=0.7)
axs[0].set_title('NB of Appearance distribution at Marvel')
axs[0].set_xlabel('Nb of Appearance')
axs[0].legend(['Marvel','DC Comic'])
axs[0].set_xlim((1,10000))
axs[0].set_ylim((2e-5,1))
axs[0].set_ylabel('Count')
axs[0].set_xscale('log')
axs[0].set_yscale('log')
median_marvel = marvel_pers['Number_of_apparitions'].mean()
median_dc = dc_pers['Number_of_apparitions'].mean()
textstr = '\n'.join((\
    'Nb appearance: \n'\
    r'$\mathrm{Marvel\ Mean}=%.1f$' % (median_marvel, ),\
    r'$\mathrm{DC\ Mean}=%.1f$' % (median_dc, )))       
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
axs[0].text(0.5, 0.88, textstr, transform=axs[0].transAxes, fontsize=12,\
        verticalalignment='top', bbox=props)

nb_bins = 100
logbins = np.logspace(np.log10(1),np.log10(max(marvel_pers['Longevity'])),nb_bins)
marvel_long = marvel_pers['Longevity'].value_counts(normalize=True)
dc_long = dc_pers['Longevity'].value_counts(normalize=True)
axs[1].scatter(marvel_long.index, marvel_long.values, facecolors='none', edgecolors = 'red', alpha=0.95)
axs[1].scatter(dc_long.index, dc_long.values, facecolors='none', edgecolors = 'blue', alpha=0.95)
#axs[1].hist(marvel_pers['Longevity'], color = 'red',bins=logbins, alpha=0.7)
#axs[1].hist(dc_pers['Longevity'], color = 'blue', bins=logbins, alpha=0.7)
axs[1].set_title('Longevity distribution at Marvel')
axs[1].set_xlabel('Longevity')
axs[1].set_ylabel('Count')
axs[1].legend(['Marvel','DC Comic'])
axs[1].set_xscale('log')
axs[1].set_yscale('log')
axs[1].set_xlim((1,90))
axs[1].set_ylim((2e-5,1))
median_marvel = marvel_pers['Longevity'].mean()
median_dc = dc_pers['Longevity'].mean()
textstr1 = '\n'.join((\
    'Longevity: \n'\
    r'$\mathrm{Marvel\ Mean}=%2.1f$' % (median_marvel, ),\
    r'$\mathrm{DC\ Mean}=%2.1f$' % (median_dc, )))       
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
axs[1].text(0.5, 0.88, textstr1, transform=axs[1].transAxes, fontsize=12,\
        verticalalignment='top', bbox=props)

fig.show()
plt.savefig('img/longevity/histogram_appareace_longevity.png')
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/ipykernel_launcher.py:59: UserWarning:

Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.

We see that the distribution really follows power laws. So for famousity, we could take the mean of the normalized logarithm, i.e: $$\text{Famous} = \frac{\frac{\log(\text{longevity})}{\max(\log(\text{longevity}))}+\frac{\log(\text{Appearance})}{\max(\log(\text{Appearance}))}}{2}$$

In [13]:
marvel_pers['Famous']=(np.log(marvel_pers['Number_of_apparitions'])/max(np.log(marvel_pers['Number_of_apparitions']))+np.log(marvel_pers['Longevity'])/max(np.log(marvel_pers['Longevity'])))/2
dc_pers['Famous']=(np.log(dc_pers['Number_of_apparitions'])/max(np.log(dc_pers['Number_of_apparitions']))+np.log(dc_pers['Longevity'])/max(np.log(dc_pers['Longevity'])))/2
In [14]:
#Top 10 famous character at Marvel
marvel_pers.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[14]:
Real Name Current Alias
24326 Steven "Steve" Rogers Captain America
20183 Peter Benjamin Parker Spider-Man
1290 Anthony Edward "Tony" Stark Iron Man
25251 Thor Odinson All-Father Thor
21335 Reed Richards Mister Fantastic
2576 Benjamin Jacob "Ben" Grimm The Thing
3444 Robert Bruce Banner Hulk
18466 Namor McKenzie Sub-Mariner
13168 Jonathan Lowell Spencer "Johnny" Storm Human Torch
22813 Scott Summers Cyclops
In [15]:
#Top 10 famous character at DC
dc_pers.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[15]:
Real Name Current Alias
5892 Kal-El (birth name);Clark Kent (legal name) Superman
1507 Bruce Wayne Batman
4193 Harold "Hal" Jordan Green Lantern
4665 J'onn J'onzz Martian Manhunter
8349 Orin Aquaman
1198 Bartholomew Henry "Barry" Allen The Flash
329 Alan Ladd Wellington Scott Green Lantern
5082 Jason Peter "Jay" Garrick The Flash
11244 Wallace Rudolph "Wally" West The Flash
1664 Bruce Wayne Batman

It looks to work, since we find the top of the two list + the ones that are in both list. Let's check, if this techniques for both good and bad character, and see if we recognize them

In [16]:
#10 most famous GOOD character at MARVEL
marvel_pers[marvel_pers['Behavior']=='Good'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[16]:
Real Name Current Alias
24326 Steven "Steve" Rogers Captain America
20183 Peter Benjamin Parker Spider-Man
1290 Anthony Edward "Tony" Stark Iron Man
25251 Thor Odinson All-Father Thor
21335 Reed Richards Mister Fantastic
2576 Benjamin Jacob "Ben" Grimm The Thing
3444 Robert Bruce Banner Hulk
18466 Namor McKenzie Sub-Mariner
13168 Jonathan Lowell Spencer "Johnny" Storm Human Torch
22813 Scott Summers Cyclops
In [17]:
#10 most famous bad character at MARVEL
marvel_pers[marvel_pers['Behavior']=='Bad'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[17]:
Real Name Current Alias
15373 Loki Laufeyson Unknown
27216 Wilson Grant Fisk Kingpin
12750 Johann Shmidt Red Skull
27138 William Baker Sandman
17383 Mephisto Unknown
210 Adrian Toomes Vulture
23079 Sergei Nikolaevich Kravinoff (originally Serge... Kraven the Hunter
3614 Cain Marko Juggernaut
26666 Vlad Dracula Dracula
874 Amora Enchantress
In [18]:
#10 most famous good character at DC COMICS
dc_pers[dc_pers['Behavior']=='Good'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[18]:
Real Name Current Alias
5892 Kal-El (birth name);Clark Kent (legal name) Superman
1507 Bruce Wayne Batman
4193 Harold "Hal" Jordan Green Lantern
4665 J'onn J'onzz Martian Manhunter
8349 Orin Aquaman
1198 Bartholomew Henry "Barry" Allen The Flash
329 Alan Ladd Wellington Scott Green Lantern
5082 Jason Peter "Jay" Garrick The Flash
11244 Wallace Rudolph "Wally" West The Flash
1664 Bruce Wayne Batman
In [19]:
#10 most famous good character at DC COMICS
dc_pers[dc_pers['Behavior']=='Bad'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[19]:
Real Name Current Alias
11996 Alexander Joseph Luthor Lex Luthor
21533 Uxas Darkseid
12161 Alexis Luthor Lex Luthor
16602 Unknown The Joker
15259 George "Digger" Harkness Captain Boomerang
15635 Harvey Dent Two-Face
11874 Adolf Hitler Adolf Hitler
21182 Thaal Sinestro Sinestro
15557 Grodd Gorilla Grodd
21467 Vandar Adg II Vandal Savage

Calculate the score in longevity, apparition and famousity out of 100

In [20]:
marvel_pers['Score appearance'] = np.log(marvel_pers['Number_of_apparitions'])/max(np.log(marvel_pers['Number_of_apparitions']))*100
marvel_pers['Score longevity'] = np.log(marvel_pers['Longevity'])/max(np.log(marvel_pers['Longevity']))*100
marvel_pers['Score Famous'] = 100*marvel_pers['Famous']
dc_pers['Score appearance'] = np.log(dc_pers['Number_of_apparitions'])/max(np.log(dc_pers['Number_of_apparitions']))*100
dc_pers['Score longevity'] = np.log(dc_pers['Longevity'])/max(np.log(dc_pers['Longevity']))*100
dc_pers['Score Famous'] = 100*dc_pers['Famous']

Appearance over the years

In [21]:
dc_pers
Out[21]:
URL Real Name Identity Current Alias Citizenship Good or Bad Marital Status Occupation Education Gender ... Behavior Number_of_apparitions DatesString years First_apparition Longevity Famous Score appearance Score longevity Score Famous
0 wiki/Adam_Blake_(The_Nail) Adam Blake Secret Identity Captain Comet Unknown Good Unknown Unknown Unknown Male ... Neutral 1 September, 2004 [2004] 2004.0 1 0.000000 0.000000 0.000000 0.000000
2 wiki/Adellca_(New_Earth) Adellca Secret Identity Green Lantern Unknown Good Single Green Lantern Unknown Female ... Good 1 September, 2011 [2011] 2011.0 1 0.000000 0.000000 0.000000 0.000000
3 wiki/A-1_(Prime_Earth) Artificial Intelligence Data Flow Unknown A-I Unknown Good Single Unknown Unknown Unknown ... Good 1 December, 2013 [2013] 2013.0 1 0.000000 0.000000 0.000000 0.000000
4 wiki/Ace_Egan_(Quality_Universe) Ace Egan Secret Identity Ace of Space Unknown Good Unknown Unknown Unknown Male ... Good 2 February, 1941,November, 1940 [1941, 1940] 1940.0 2 0.126297 8.654570 16.604765 12.629667
5 wiki/Abigail_Cable_(The_Nail) Abigail Cable Unknown Unknown Unknown Good Unknown Unknown Unknown Female ... Neutral 1 October, 1998 [1998] 1998.0 1 0.000000 0.000000 0.000000 0.000000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
22188 wiki/Zyklon_(New_Earth) Unknown Unknown Der Zyklon German Bad Unknown Nazi Agent Unknown Male ... Good 2 April, 1987,June, 1985 [1987, 1985] 1985.0 2 0.126297 8.654570 16.604765 12.629667
22190 wiki/Zymyr_(Pre-Zero_Hour) Zymyr Public Identity Zymyr United Planets Citizen Bad Single Scientist Unknown Male ... Bad 13 September, 1985,August, 1985,December, 1987,Ma... [1985, 1985, 1987, 1986, 1988, 1985, 1985, 198... 1984.0 5 0.352904 32.025715 38.555069 35.290392
22191 wiki/Z%C3%BCM_(New_Earth) Unknown Secret Identity ZüM Unknown Bad Single Super-Villain Unknown Male ... Bad 1 June, 2016 [2016] 2016.0 1 0.000000 0.000000 0.000000 0.000000
22192 wiki/Zyn_(New_Earth) Zyn Public Identity Unknown Unknown Bad Unknown Mercenary Unknown Male ... Bad 4 January, 1992,February, 1992,February, 1993,Ma... [1992, 1992, 1993, 1993] 1992.0 2 0.169570 17.309140 16.604765 16.956952
22193 wiki/Zwerg_(New_Earth) Zwerg (first name unknown) Public Identity Major Zwerg German Bad Unknown Unknown Unknown Male ... Bad 3 January, 1978,June, 1985,February, 1988 [1978, 1985, 1988] 1978.0 3 0.200175 13.717169 26.317929 20.017549

13164 rows × 33 columns

In [22]:
def plot_longevity(category, name):
    n = 0
    k = 0
    is_ = np.empty(2)
    activity = pd.Series([])
    try:
        if len(marvel_pers[marvel_pers[category]==name])==1:
            print(3)
            activity[k] = pd.Series(marvel_pers[marvel_pers[category]==name]['years'].values[0]).value_counts()
            print(2)
            appearance = marvel_pers[marvel_pers[category]==name]['Score appearance'].values[0]
            print(1)
            longevity = marvel_pers[marvel_pers[category]==name]['Score longevity'].values[0]
            famous = marvel_pers[marvel_pers[category]==name]['Score Famous'].values[0]    
        else:
            activity[k] = pd.Series(marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
            appearance = marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
            longevity = marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
            famous = marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]   
        is_[n] = 1
        k += 1
    except:
        is_[n] = 0
    n+=1
    try:
        if len(dc_pers[dc_pers[category]==name])==1:
            activity[k] = pd.Series(dc_pers[dc_pers[category]==name]['years'].values[0]).value_counts()
            appearance = dc_pers[dc_pers[category]==name]['Score appearance'].values[0]
            longevity = dc_pers[dc_pers[category]==name]['Score longevity'].values[0]
            famous = dc_pers[dc_pers[category]==name]['Score Famous'].values[0]  
        else:
            activity[k] = pd.Series(dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
            appearance = dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
            longevity = dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
            famous = dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]   
        is_[n] = 1
        k += 1
    except:
        is_[n] = 0
    n+=1
    fig, axs = plt.subplots(1,1,figsize=(14,k*6))
    l = 0
    for j in range(2):
        if is_[j]:
            if j==0:
                axs.bar(x = activity[l].index, height = activity[l], width = 1, color='red')
            else:
                axs.bar(x = activity[l].index, height = activity[l], width = 1, color='blue')
            axs.set_xlabel('Year')
            axs.set_ylabel('NB of appearance')
            axs.set_xlim(1930,2020)
            if j == 0:
                axs.set_title('"'+name + '" appearance in Marvel Comics')
            else:
                axs.set_title('"'+name + '" appearance in DC Comics')
            l+=1
    textstr = '\n'.join((\
    'Score: \n'\
    r'$\mathrm{Appearance}=%.1f$' % (appearance, ),\
    r'$\mathrm{Longevity}=%.1f$' % (longevity, ),\
    r'$\mathrm{Famous}=%.1f$' % (famous, )))        
    # these are matplotlib.patch.Patch properties
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    # place a text box in upper left in axes coords
    axs.text(0.05, 0.95, textstr, transform=axs.transAxes, fontsize=14,\
        verticalalignment='top', bbox=props)
    fig.show()
    plt.savefig('img/longevity/pers_'+name+'.png')

plot_longevity('Current Alias','Batman')
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/ipykernel_launcher.py:67: UserWarning:

Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.

In [23]:
dc_pers[dc_pers['Current Alias']=='Batman'].sort_values(by='Score Famous',ascending=False)
Out[23]:
URL Real Name Identity Current Alias Citizenship Good or Bad Marital Status Occupation Education Gender ... Behavior Number_of_apparitions DatesString years First_apparition Longevity Famous Score appearance Score longevity Score Famous
1507 wiki/Bruce_Wayne_(Earth-One) Bruce Wayne Secret Identity Batman American Good Single Businessman Unknown Male ... Good 1326 December, 1964,December, 1964,December, 1964,M... [1964, 1964, 1964, 1965, 1965, 1965, 1965, 196... 1952.0 54 0.926656 89.772689 95.558552 92.665620
1664 wiki/Bruce_Wayne_(New_Earth) Bruce Wayne Secret Identity Batman American Good Single Businessman, Vigilante and Adventurer. Unknown Male ... Good 1482 January, 2008,May, 2005,December, 2001,June, 2... [2008, 2005, 2001, 2000, 1999, 2002, 1999, 199... 1965.0 37 0.888316 91.161441 86.501747 88.831594
1465 wiki/Bruce_Wayne_(Earth-Two) Bruce Wayne Secret Identity Batman American Good Widowed Detective · Police Commissioner Unknown Male ... Good 374 1953,1953,December, 1964,December, 1964,Decemb... [1953, 1953, 1964, 1964, 1964, 1964, 1965, 198... 1940.0 54 0.847641 73.969698 95.558552 84.764125
1502 wiki/Bruce_Wayne_(DCAU) Bruce Thomas Wayne Secret Identity Batman American Good Widowed Businessman Unknown Male ... Good 94 April, 1997,September, 1998,August, 1998,2000,... [1997, 1998, 1998, 2000, 2004, 2004, 2004, 200... 1993.0 15 0.608001 56.727149 64.872999 60.800074
10508 wiki/Terrence_McGinnis_(DCAU) Terrence "Terry" McGinnis Secret Identity Batman American Good Engaged Unknown Unknown Male ... Good 41 2000,2000,March, 1999,May, 1999,October, 2000,... [2000, 2000, 1999, 1999, 2000, 2000, 2000, 200... 1999.0 10 0.507636 46.367310 55.159834 50.763572
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1551 wiki/Bruce_Wayne_(Earth-172) Bruce Wayne Secret Identity Batman American Good Single Crime Fighter Unknown Male ... Good 1 December, 1967 [1967] 1967.0 1 0.000000 0.000000 0.000000 0.000000
2557 wiki/Damian_Wayne_(Batman_in_Bethlehem) Damian Wayne Secret Identity Batman American Good Single Unknown Unknown Male ... Neutral 1 March, 2011 [2011] 2011.0 1 0.000000 0.000000 0.000000 0.000000
3161 wiki/Eliot_Ness_(Scar_of_the_Bat) Eliot Ness Unknown Batman American Good Unknown Government Agent Unknown Male ... Good 1 1996 [1996] 1996.0 1 0.000000 0.000000 0.000000 0.000000
4878 wiki/James_Gordon_II_(Digital_Justice) James Gordon II Secret Identity Batman American Good Single Detective Sergeant Unknown Male ... Good 1 February, 1990 [1990] 1990.0 1 0.000000 0.000000 0.000000 0.000000
268 wiki/Alexander_Luthor_(Earth_32) Alexander Luthor Secret Identity Batman American Good Single Vigilante Unknown Male ... Good 1 November, 2019 [2019] 2019.0 1 0.000000 0.000000 0.000000 0.000000

105 rows × 33 columns

Apparently, there are many Batman, that's why the longevity score of batman is so bad. Should we merge them???

NOTE: It is important to know that a character can appear many times in the dataset, if, for example, the character appeared in other version of the earth, or appeared with different characteristic. Hence, for this part, we will combine all character under the same alias.

In [24]:
def merge_alias(df):
    new_df = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias').first()
    new_df['years'] = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias')['years'].sum()
    new_df['Number_of_apparitions'] = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias')['Number_of_apparitions'].sum()
    new_df['First_apparition'] = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias')['First_apparition'].min()
    new_df.drop('Unknown',inplace=True)
    new_df = new_df.reset_index()
    new_df['Longevity'] = new_df['years'].apply(lambda year: len(list(set(year))))
    new_df['Famous']=(np.log(new_df['Number_of_apparitions'])/max(np.log(new_df['Number_of_apparitions']))+np.log(new_df['Longevity'])/max(np.log(new_df['Longevity'])))/2
    new_df['Score appearance'] = np.log(new_df['Number_of_apparitions'])/max(np.log(new_df['Number_of_apparitions']))*100
    new_df['Score longevity'] = np.log(new_df['Longevity'])/max(np.log(new_df['Longevity']))*100
    new_df['Score Famous'] = 100*new_df['Famous']
    return new_df

marvel_alias = merge_alias(marvel_pers)
dc_alias = merge_alias(dc_pers)
In [25]:
def plot_longevity(category, name):
    n = 0
    k = 0
    is_ = np.empty(2)
    activity = pd.Series([])
    try:
        if len(marvel_alias[marvel_alias[category]==name]['years'])==1:
            print(3)
            activity[k] = pd.Series(marvel_alias[marvel_alias[category]==name]['years'].values[0]).value_counts()
            print(2)
            appearance = marvel_alias[marvel_alias[category]==name]['Score appearance'].values[0]
            print(1)
            longevity = marvel_alias[marvel_alias[category]==name]['Score longevity'].values[0]
            famous = marvel_alias[marvel_alias[category]==name]['Score Famous'].values[0]    
        else:
            activity[k] = pd.Series(marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
            appearance = marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
            longevity = marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
            famous = marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]   
        is_[n] = 1
        k += 1
    except:
        is_[n] = 0
    n+=1
    try:
        if len(dc_alias[dc_alias[category]==name]['years'])==1:
            activity[k] = pd.Series(dc_alias[dc_alias[category]==name]['years'].values[0]).value_counts()
            appearance = dc_alias[dc_alias[category]==name]['Score appearance'].values[0]
            longevity = dc_alias[dc_alias[category]==name]['Score longevity'].values[0]
            famous = dc_alias[dc_alias[category]==name]['Score Famous'].values[0]  
        else:
            activity[k] = pd.Series(dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
            appearance = dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
            longevity = dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
            famous = dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]   
        is_[n] = 1
        k += 1
    except:
        is_[n] = 0
    n+=1
    fig, axs = plt.subplots(1,1,figsize=(14,k*6))
    l = 0
    for j in range(2):
        if is_[j]:
            if j==0:
                axs.bar(x = activity[l].index, height = activity[l], width = 1, color='red')
            else:
                axs.bar(x = activity[l].index, height = activity[l], width = 1, color='blue')
            axs.set_xlabel('Year')
            axs.set_ylabel('NB of appearance')
            axs.set_xlim(1930,2020)
            if j == 0:
                axs.set_title('"'+name + '" appearance in Marvel Comics')
            else:
                axs.set_title('"'+name + '" appearance in DC Comics')
            l+=1
    textstr = '\n'.join((\
    'Score: \n'\
    r'$\mathrm{Appearance}=%.1f$' % (appearance, ),\
    r'$\mathrm{Longevity}=%.1f$' % (longevity, ),\
    r'$\mathrm{Famous}=%.1f$' % (famous, )))        
    # these are matplotlib.patch.Patch properties
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    # place a text box in upper left in axes coords
    axs.text(0.05, 0.95, textstr, transform=axs.transAxes, fontsize=14,\
        verticalalignment='top', bbox=props)
    fig.show()
    plt.savefig('img/longevity/alias_'+name+'.png')

plot_longevity('Current Alias','Batman')
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/ipykernel_launcher.py:67: UserWarning:

Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.

In [26]:
dc_alias[dc_alias['Current Alias']=='Batman'].sort_values(by='Score Famous',ascending=False)
Out[26]:
Current Alias URL Real Name Identity Citizenship Good or Bad Marital Status Occupation Education Gender ... Behavior Number_of_apparitions DatesString years First_apparition Longevity Famous Score appearance Score longevity Score Famous
474 Batman wiki/Bruce_Wayne_(Earth-One) Bruce Wayne Secret Identity American Good Single Businessman Unknown Male ... Good 3952 December, 1964,December, 1964,December, 1964,M... [1964, 1964, 1964, 1965, 1965, 1965, 1965, 196... 1940.0 81 0.976241 95.800127 99.448012 97.62407

1 rows × 33 columns

The problem seems resolved. Let's check the most famous if they are the same

In [27]:
marvel_alias.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[27]:
Real Name Current Alias
1390 Steven "Steve" Rogers Captain America
8211 Peter Benjamin Parker Spider-Man
4055 Jonathan Lowell Spencer "Johnny" Storm Human Torch
4222 Anthony Edward "Tony" Stark Iron Man
281 Thor Odinson All-Father Thor
5854 Reed Richards Mister Fantastic
4044 Robert Bruce Banner Hulk
8830 Benjamin Jacob "Ben" Grimm The Thing
8417 Namor McKenzie Sub-Mariner
2034 Scott Summers Cyclops
In [28]:
# check most famous
dc_alias.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Out[28]:
Real Name Current Alias
5638 Kal-El (birth name);Clark Kent (legal name) Superman
474 Bruce Wayne Batman
2472 Harold "Hal" Jordan Green Lantern
5879 Bartholomew Henry "Barry" Allen The Flash
6549 Diana of Themyscira Wonder Woman
3476 Lois Lane Lois Lane
4262 Richard "Dick" Grayson Nightwing
1294 James W. Gordon Commissioner Gordon
2465 Oliver Jonas "Ollie" Queen Green Arrow
2972 James Bartholomew Olsen Jimmy Olsen

We see that the order is different, but the people are roughly the same

Just check the distribution of the score

In [29]:
marvel_alias[marvel_alias['Score Famous']>0]['Score Famous'].hist(bins=100)
Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x117247ad0>

We are going to merge the two dataframe and analyse them together in order to determine the factor of famousity of a character. For this, we will keep all the occurence for each alias, so we can determine which part of them, which attributes as more chance to stay longer

In [30]:
#add a tag to know where they are from
marvel_pers['Comic'] = 'Marvel'
dc_pers['Comic'] = 'DC'
In [31]:
marvel_pers.columns
Out[31]:
Index(['URL', 'Real Name', 'Identity', 'Current Alias', 'Citizenship',
       'Marital Status', 'Occupation', 'Education', 'Gender', 'Height',
       'Weight', 'Eyes', 'Hair', 'Place of Birth', 'Height in string',
       'Height in float', 'Weight in string', 'Weight in float', 'Good_count',
       'Bad_count', 'Neutral_count', 'Dates', 'Behavior',
       'Number_of_apparitions', 'DatesString', 'years', 'First_apparition',
       'Longevity', 'Famous', 'Score appearance', 'Score longevity',
       'Score Famous', 'Comic'],
      dtype='object')
In [32]:
dc_pers.columns
Out[32]:
Index(['URL', 'Real Name', 'Identity', 'Current Alias', 'Citizenship',
       'Good or Bad', 'Marital Status', 'Occupation', 'Education', 'Gender',
       'Height', 'Weight', 'Eyes', 'Hair', 'Place of Birth',
       'Height in string', 'Height in float', 'Weight in string',
       'Weight in float', 'Good_count', 'Bad_count', 'Neutral_count', 'Dates',
       'Behavior', 'Number_of_apparitions', 'DatesString', 'years',
       'First_apparition', 'Longevity', 'Famous', 'Score appearance',
       'Score longevity', 'Score Famous', 'Comic'],
      dtype='object')
In [33]:
attribute = ['URL', 'Real Name', 'Current Alias', 'Comic', 'Identity', 'Citizenship', 'Marital Status',\
            'Occupation', 'Education', 'Gender', 'Height in float', 'Weight in float', 'Eyes', 'Hair',\
            'Place of Birth','Behavior',\
            'Number_of_apparitions', 'years', 'First_apparition', 'Longevity', 'Score appearance',\
            'Score longevity', 'Score Famous']
In [34]:
pers = pd.concat([marvel_pers[attribute],dc_pers[attribute]],axis=0)
In [35]:
pers.rename(inplace=True, columns={'Height in float':'Height', 'Weight in float':'Weight',\
                                   'Number_of_apparitions':'Nb appearance','years':'Years',\
                                   'First_apparition':'First appearance', })

we will seperate the dataset in 3 groups and analyse there attributes. The split is done as followed

In [36]:
pers_score = pers['Score Famous'].value_counts()
pers_high = len(pers[pers['Score Famous']>60])
pers_med = len(pers[(pers['Score Famous']<=60)&(pers['Score Famous']>20)])
pers_low = len(pers[pers['Score Famous']<=20])
total = len(pers['Score Famous'])

fig, axs = plt.subplots(1,1,figsize=(14,6))
axs.bar(pers_score.index, pers_score.values, color='purple')
axs.axvspan(60, 100, alpha=0.2, color='green')
axs.axvspan(20.1, 60, alpha=0.2, color='orange')
axs.axvspan(0, 19.9, alpha=0.2, color='brown')
axs.set_xlabel('Score Famous')
axs.set_ylabel('Number of character')
axs.set_title('Repartition of the character in the group of celebrity')
axs.set_yscale('log')
s = "Low Famousness\n , N = %d\n(%.2f%%)" % (pers_low,pers_low/total*100 )
axs.text(10,1e4,s,ha='center', va='top')
s = "Medium Famousness\n , N = %d\n(%.2f%%)" % (pers_med,pers_med/total*100)
axs.text(40,1e4,s,ha='center', va='top')
s = "High Famousness\n , N = %d\n(%.2f%%)" % (pers_high,pers_high/total*100)
axs.text(80,1e4,s,ha='center', va='top');
plt.savefig('img/longevity/repartition.png')

What does a famous character look like, and what is unfamous?

(For this part, I take the job of Pilou and modify it)

In [37]:
pers_high=pers[pers['Score Famous']>60]
pers_med = pers[(pers['Score Famous']<=60)&(pers['Score Famous']>20)]
pers_low = pers[pers['Score Famous']<=20]
In [38]:
#We normalize the count with the number of characters in every category in order to have a fair comparison
marital_status_high = pd.DataFrame(pers_high["Marital Status"].drop(index=pers_high[pers_high["Marital Status"]=='Unknown'].index).value_counts(normalize=True))
marital_status_high.columns = ['high_count']
marital_status_low = pd.DataFrame(pers_low["Marital Status"].drop(index=pers_low[pers_low["Marital Status"]=='Unknown'].index).value_counts(normalize=True))
marital_status_low.columns = ['low_count']
marital_status_med = pd.DataFrame(pers_med["Marital Status"].drop(index=pers_med[pers_med["Marital Status"]=='Unknown'].index).value_counts(normalize=True))
marital_status_med.columns = ['med_count']
dfList = [marital_status_high, marital_status_low, marital_status_med]
#dfs = [df.set_index(marital_status_high.index) for df in dfList]
marital_status = pd.concat(dfList, axis=1, join='outer', sort=True)\
                   .fillna(0)
marital_status = marital_status.sort_values(by='high_count', ascending=False)

fig, axs = plt.subplots(1, 1, figsize=(14,8))
X = np.arange(len(marital_status))
axs.bar(X+0.25, height = marital_status['high_count'], width=0.25, color='green', label='high',alpha=1);
axs.bar(X, height = marital_status['med_count'],  width=0.25, color='orange', label='medium',alpha=1);
axs.bar(X-0.25, height = marital_status['low_count'],  width=0.25, color='brown', label='low',alpha=1);
axs.set_title('Marital Status with respect to the famousness')
axs.set_xlabel('Marital Status')
axs.set_xticklabels([''] + list(marital_status.index))
axs.set_ylabel('Normalized Count (log)')
axs.set_yscale('log')
plt.legend()
plt.savefig('img/longevity/marital_status.png')
In [39]:
#We normalize the count with the number of characters in every category in order to have a fair comparison
gender_high = pd.DataFrame(pers_high["Gender"].drop(index=pers_high[pers_high["Gender"]=='Unknown'].index).value_counts(normalize=True))
gender_high.columns = ['high_count']
gender_low = pd.DataFrame(pers_low["Gender"].drop(index=pers_low[pers_low["Gender"]=='Unknown'].index).value_counts(normalize=True))
gender_low.columns = ['low_count']
gender_med = pd.DataFrame(pers_med["Gender"].drop(index=pers_med[pers_med["Gender"]=='Unknown'].index).value_counts(normalize=True))
gender_med.columns = ['med_count']
dfList = [gender_high, gender_low, gender_med]
gender = pd.concat(dfList, axis=1, join='outer', sort=True)\
                   .fillna(0)
gender = gender.sort_values(by='high_count', ascending=False)

fig, axs = plt.subplots(1, 1, figsize=(14,8))
X = np.arange(len(gender))
axs.bar(X+0.25, height = gender['high_count'], width=0.25, color='green', label='high');
axs.bar(X, height = gender['med_count'],  width=0.25, color='orange', label='medium');
axs.bar(X-0.25, height = gender['low_count'],  width=0.25, color='brown', label='low');
axs.set_title('Gender with respect to the famousness')
axs.set_xlabel('Gender')
axs.set_xticklabels([''] + list(gender.index))
axs.set_ylabel('Normalized Count (log)')
axs.set_yscale('log')
plt.legend()
plt.savefig('img/longevity/gender.png')

Transgender are quite new, that's why there have low rank in famousness

In [40]:
height_high = pers_high[(pers_high["Height"]!=np.nan)\
                          &(pers_high["Height"]<500)]['Height']
height_low = pers_low[(pers_low["Height"]!=np.nan)\
                          &(pers_low["Height"]<500)]['Height']
height_med = pers_med[(pers_med["Height"]!=np.nan)\
                                &(pers_med["Height"]<500)]['Height']

fig, axs = plt.subplots(1, 1, figsize=(14,8))
# To rectify the dominant class and still maintain the separateness of the distributions we normalize it by setting 
# density=True and stacked=True. By doing so, the total area under each distribution becomes 1.
kwargs = dict(alpha=0.2, bins=100, density=True, stacked=True)

axs.hist(height_high, **kwargs, color='green', label='high')
axs.hist(height_med, **kwargs, color='orange', label='medium')
axs.hist(height_low, **kwargs, color='brown', label='low')

axs.set_title('Probability Histogram of the Height')
axs.set_xlabel('Height in cm')
axs.set_ylabel('Probability')
plt.legend();
plt.savefig('img/longevity/height.png')
In [41]:
weight_high = pers_high[(pers_high["Weight"]!=np.nan)\
                          &(pers_high["Weight"]<500)]['Weight']
weight_low = pers_low[(pers_low["Weight"]!=np.nan)\
                          &(pers_low["Weight"]<500)]['Weight']
weight_med = pers_med[(pers_med["Weight"]!=np.nan)\
                                &(pers_med["Weight"]<500)]['Weight']

fig, axs = plt.subplots(1, 1, figsize=(14,8))
# To rectify the dominant class and still maintain the separateness of the distributions we normalize it by setting 
# density=True and stacked=True. By doing so, the total area under each distribution becomes 1.
kwargs = dict(alpha=0.3, bins=100, density=True, stacked=True)

axs.hist(weight_low, **kwargs, color='brown', label='low')
axs.hist(weight_med, **kwargs, color='orange', label='medium')
axs.hist(weight_high, **kwargs, color='green', label='high')
axs.set_title('Probability Histogram of the Weight')
axs.set_xlabel('Weight in kg')
axs.set_ylabel('Probability')
plt.legend();
plt.savefig('img/longevity/weight.png')

There are not many difference. The two picks are the good character, and the bad character

In [42]:
#Top 10 good characthers eyes dataframe
eyes_good_top10 = pd.DataFrame(pers_high['Eyes'].value_counts(normalize=True))
eyes_good_top10 = eyes_good_top10.reset_index()
eyes_good_top10.columns = ['Eyes', 'High']
#Top 10 bad characthers eyes dataframe
eyes_bad_top10 = pd.DataFrame(pers_low['Eyes'].value_counts(normalize=True))
eyes_bad_top10 = eyes_bad_top10.reset_index()
eyes_bad_top10.columns = ['Eyes', 'Low']
#Top 10 neutral characthers eyes dataframe
eyes_neutral_top10 = pd.DataFrame(pers_med['Eyes'].value_counts(normalize=True))
eyes_neutral_top10 = eyes_neutral_top10.reset_index()
eyes_neutral_top10.columns = ['Eyes', 'Medium']
#Join on Eyes
#eyes_neutral_top10.merge(eyes_good_top10,eyes_bad_top10, on='Eyes', how='outer')
dfList = [eyes_good_top10, eyes_bad_top10, eyes_neutral_top10]
dfs = [df.set_index('Eyes') for df in dfList]
eyes = pd.concat(dfs, axis=1, join='outer', sort=True) \
         .fillna(0) 
eyes.T

from math import pi
 
def make_spider(df):
    
    # initialize the figure
    my_dpi=100
    plt.figure(figsize=(1000/my_dpi, 1000/my_dpi), dpi=my_dpi)

    # Create a color palette:
    my_palette = plt.cm.get_cmap("Set2", len(df.index))

    # number of variable
    categories=list(df)[1:]
    N = len(categories)

    # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]
    
    for row in range(0, len(df.index)):
        # Initialise the spider plot
        ax = plt.subplot(2,2,row+1, polar=True, )

        # If you want the first axis to be on top:
        ax.set_theta_offset(pi / 2)
        ax.set_theta_direction(-1)

        # Draw one axe per variable + add labels labels yet
        plt.xticks(angles[:-1], categories, color='grey', size=8)

        # Draw ylabels
        ax.set_rlabel_position(0)
        list_ = np.round(np.linspace(0+0.05,round(np.max(df.max().values),1)-0.05,3),3)
        plt.yticks(list_, map(str,list_), color="grey", size=7)
        plt.ylim(0,round(np.max(df.max().values),1))

        # Ind1
        values=df.iloc[row].values.flatten().tolist()
        #print(sum(values))
        ax.plot(angles, values, color=my_palette(row), linewidth=2, linestyle='solid')
        ax.fill(angles, values, color=my_palette(row), alpha=0.4)

        # Add a title
        plt.title(df.index[row], size=11, color=my_palette(row), y=1.07)
        plt.savefig('img/longevity/eye.png')
        
#make_spider(eyes.T)
In [43]:
pickle.dump(marvel_pers, open('data_pickle/marvel_pers_final_2','wb'))
pickle.dump(dc_pers, open('data_pickle/dc_pers_final_2','wb'))

PLOT FOR WEBSITE

In [44]:
# Start froma beginning
marvel_pers = pd.read_pickle("data_pickle/marvel_pers_final_2")
dc_pers = pd.read_pickle("data_pickle/dc_pers_final_2")
In [45]:
######### THIS PLOT IS FOR AN EXEMPLE OF A SIMPLE PLOT, IT WILL NOT BE USED #######################

marvel_line = marvel_pers[(marvel_pers['Marital Status']!='Unknown')].explode('years').dropna(subset=['years']).groupby('years')['Marital Status'].value_counts(normalize=True)
dc_line = dc_pers[(dc_pers['Marital Status']!='Unknown')].explode('years').dropna(subset=['years']).groupby('years')['Marital Status'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)



fig = make_subplots(rows=1, cols=2,subplot_titles=("Marvel", "DC Comics"))
color ='blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='purple'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='cyan'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='yellow'
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Remarried'] ,mode='lines',name='Remarried',line=dict(color=color, width=2),showlegend=True),row=1, col=2)

fig.update_xaxes(title_text="Categories", row=1, col=1)
fig.update_xaxes(title_text="Categories", row=1, col=2)

fig.update_yaxes(title_text="Proportion", row=1, col=1)
fig.update_yaxes(title_text="Proportion", row=1, col=2)

fig.update_layout(barmode='group', 
                  title="Marital Status",
                  font=dict(family='Komika Hand',
                            size=10,
                            color="#7f7f7f"))

fig.show()
In [46]:
dc_pers['Citizenship'].value_counts().head(10)
Out[46]:
American                  6565
Unknown                   4310
British                    299
United Planets Citizen     254
Amazon                     185
German                     149
Apokoliptian               147
Atlantean                  123
Russian                    103
Japanese                    93
Name: Citizenship, dtype: int64
In [47]:
marvel_pers['Citizenship'].value_counts().head(10)
Out[47]:
American         10841
Unknown           7683
British            719
German             459
Canadian           324
Japanese           233
Russian            227
Skrull Empire      225
Chinese            186
Attilan            176
Name: Citizenship, dtype: int64
In [48]:
############################################################################################################################
##             This plot is the generalization of the previous plot, it is made by hand bacause it's the first
##             complicated plot made with Plotly, the other plots are more optimized
#######################################################################################################################################

fig = make_subplots(rows=1, cols=2, subplot_titles=("Marvel", "DC Comics"))

marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

######### 0: Number of character: 2 trace ########
marvel_line = marvel_explode['years'].value_counts().sort_index()
dc_line = dc_explode['years'].value_counts().sort_index()

fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line ,mode='lines',name='Marvel',line=dict(color="#990000", width=3),visible=False),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line ,mode='lines',name='DC Comics',line=dict(color="#0F4C81", width=3),visible=False),row=1, col=2)

##########  1:  MARITAL STATUS : 13 trace  ########

marvel_line = marvel_explode[(marvel_explode['Marital Status']!='Unknown')].groupby('years')['Marital Status'].value_counts(normalize=True)
dc_line = dc_explode[(dc_explode['Marital Status']!='Unknown')].explode('years').groupby('years')['Marital Status'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)

color ='blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='purple'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='cyan'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='yellow'
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Remarried'] ,mode='lines',name='Remarried',line=dict(color=color, width=2),showlegend=True,visible=False, legendgroup=color),row=1, col=2)


fig.update_xaxes(title_text="Year", row=1, col=1)
fig.update_xaxes(title_text="Year", row=1, col=2)


####### ###########  2:  GENDER    : 10 trace  #######################

marvel_line = marvel_explode[(marvel_explode['Gender']!='Unknown')].groupby('years')['Gender'].value_counts(normalize=True)
dc_line = dc_explode[(dc_explode['Gender']!='Unknown')].groupby('years')['Gender'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)

color ='blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Male'] ,mode='lines',name='Male',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Male'] ,mode='lines',name='Male',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='magenta'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Female'] ,mode='lines',name='Female',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Female'] ,mode='lines',name='Female',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='grey'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Agender'] ,mode='lines',name='Agender',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Genderless'] ,mode='lines',name='Agender',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Genderfluid'] ,mode='lines',name='Genderfluid',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
color ='brown'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Gestalt'] ,mode='lines',name='Gestalt',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
color ='cyan'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Transgender'] ,mode='lines',name='Transgender',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Transgender'] ,mode='lines',name='Transgender',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

####### ####### ####### 3:    Behavior:    6 trace #########################

marvel_line = marvel_explode.groupby('years')['Behavior'].value_counts(normalize=True)
dc_line = dc_explode.groupby('years')['Behavior'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)

color ='green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Good'] ,mode='lines',name='Good',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Good'] ,mode='lines',name='Good',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Neutral'] ,mode='lines',name='Neutral',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Neutral'] ,mode='lines',name='Neutral',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='black'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Bad'] ,mode='lines',name='Evil',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Bad'] ,mode='lines',name='Evil',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)


######### ###### #### 4: Citizenship:   12 traces  #######################

#only take the 6 biggest nation represented: American, British, German, Canadian, Japanese, Russian
marvel_line = marvel_explode.groupby('years')['Citizenship'].value_counts()
dc_line = dc_explode.groupby('years')['Citizenship'].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)

color ='blue'; category = 'American'; label = 'American'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='blueviolet'; category = 'British'; label = 'British'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='black'; category = 'German'; label = 'German'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='navy'; category = 'Chinese'; label = 'Chinese'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='brown'; category = 'Japanese'; label = 'Japanese'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='red'; category = 'Russian'; label = 'Russian'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)


#### #### #### 5: Occupation:  12 traces  #########################

#only take 6 more represented occupation: Student, Criminal, Scientist, Adventurer, Mercenary, Soldier
attribut = 'Occupation'
marvel_line = marvel_explode.groupby('years')[attribut].value_counts()
dc_line = dc_explode.groupby('years')[attribut].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)

color ='blue'; category = 'Student'; label = 'Student'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='red'; category = 'Criminal'; label = 'Criminal'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='green'; category = 'Scientist'; label = 'Scientist'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='magenta'; category = 'Adventurer'; label = 'Adventurer'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='cyan'; category = 'Mercenary'; label = 'Mercenary'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='orange'; category = 'Soldier'; label = 'Soldier'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

##### ######  ###### 6. Height:    6 traces #######################
median_marvel = marvel_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].median()
low_quar_marvel = marvel_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].quantile(0.25)
high_quar_marvel = marvel_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].quantile(0.75)

median_dc = dc_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].median()
low_quar_dc = dc_explode.dropna(subset=['Height in float']).explode('years').groupby('years')['Height in float'].quantile(0.25)
high_quar_dc = dc_explode.dropna(subset=['Height in float']).explode('years').groupby('years')['Height in float'].quantile(0.75)

fig.add_trace(go.Scatter(x=low_quar_marvel.index, y=low_quar_marvel,fill=None,mode='lines',line_color='#990000',name='Low quartile',visible=False),row=1, col=1)
fig.add_trace(go.Scatter(x=high_quar_marvel.index,y=high_quar_marvel,fill='tonexty', mode='lines', line_color='#990000',name='High quartile',visible=False),row=1, col=1)
fig.add_trace(go.Scatter(x=median_marvel.index,y=median_marvel,mode='lines', line_color='whitesmoke',name='Median',visible=False),row=1, col=1)

fig.add_trace(go.Scatter(x=low_quar_dc.index, y=low_quar_dc,fill=None,mode='lines',line_color='#0F4C81',name='Low quartile',showlegend=True,visible=False),row=1, col=2)
fig.add_trace(go.Scatter(x=high_quar_dc.index,y=high_quar_dc,fill='tonexty', mode='lines', line_color='#0F4C81',name='High quartile',showlegend=True,visible=False),row=1, col=2)
fig.add_trace(go.Scatter(x=median_dc.index,y=median_dc,mode='lines', line_color='whitesmoke',name='Median',showlegend=False,visible=False),row=1, col=2)


##### ######  ###### 7. Weight:    6 traces #######################

median_marvel = marvel_explode.dropna(subset=['Weight in float']).groupby('years')['Weight in float'].median()
low_quar_marvel = marvel_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.25)
high_quar_marvel = marvel_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.75)

median_dc = dc_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].median()
low_quar_dc = dc_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.25)
high_quar_dc = dc_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.75)

fig.add_trace(go.Scatter(x=low_quar_marvel.index, y=low_quar_marvel,fill=None,mode='lines',line_color='#990000',name='Low quartile',visible=False, legendgroup='low'),row=1, col=1)
fig.add_trace(go.Scatter(x=high_quar_marvel.index,y=high_quar_marvel,fill='tonexty', mode='lines', line_color='#990000',name='High quartile',visible=False, legendgroup='high'),row=1, col=1)
fig.add_trace(go.Scatter(x=median_marvel.index,y=median_marvel,mode='lines', line_color='whitesmoke',name='Median',visible=False, legendgroup='med'),row=1, col=1)

fig.add_trace(go.Scatter(x=low_quar_dc.index, y=low_quar_dc,fill=None,mode='lines',line_color='#0F4C81',name='Low quartile',showlegend=True,visible=False, legendgroup='low'),row=1, col=2)
fig.add_trace(go.Scatter(x=high_quar_dc.index,y=high_quar_dc,fill='tonexty', mode='lines', line_color='#0F4C81',name='High quartile',showlegend=True,visible=False, legendgroup='high'),row=1, col=2)
fig.add_trace(go.Scatter(x=median_dc.index,y=median_dc,mode='lines', line_color='whitesmoke',name='Median',showlegend=False,visible=False, legendgroup='med'),row=1, col=2)


#####################   8: EYES:    12 traces    ############

#only take 6 more represented occupation:  
attribut = 'Eyes'
marvel_line = marvel_explode.groupby('years')[attribut].value_counts()
dc_line = dc_explode.groupby('years')[attribut].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)

color ='brown'; category = 'Brown'; label = 'Brown'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='blue'; category = 'Blue'; label = 'Blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='black'; category = 'Black'; label = 'Black'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='green'; category = 'Green'; label = 'Green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='red'; category = 'Red'; label = 'Red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='white'; category = 'White'; label = 'White'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

#####################   9: Hair:    12 traces    ############

#only take 6 more represented occupation: 
attribut = 'Hair'
marvel_line = marvel_explode.groupby('years')[attribut].value_counts()
dc_line = dc_explode.groupby('years')[attribut].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)

color ='brown'; category = 'Brown'; label = 'Brown'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='pink'; category = 'Bald'; label = 'Bald'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='black'; category = 'Black'; label = 'Black'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='yellow'; category = 'Blond'; label = 'Blond'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='red'; category = 'Red'; label = 'Red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)

color ='white'; category = 'White'; label = 'White'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)




# Initialize the axis
fig.update_xaxes(title_text="Year", row=1, col=1)
fig.update_xaxes(title_text="Year", row=1, col=2)

#fig.update_yaxes(title_text="Proportion", row=1, col=1)
#fig.update_yaxes(title_text="Proportion", row=1, col=2)

#intialize title and font
fig.update_layout(barmode='group', 
                  title=dict(text="General analysis", 
                             font= {'family':'Komika Hand', 
                                      'color':'#7f7f7f', 
                                      'size':20}, 
                             x=0, 
                             xanchor='left', 
                             y=0.95, 
                             yanchor='top'),
                  font=dict(family='Komika Hand',
                            size=10,
                            color="#7f7f7f"))

# Make the first traces visible
for i in range(2):
    fig.data[i].visible = True

#### BUTTTOONNN####
# create the filters
visible_number= [True]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_marital= [False]*2 + [True]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_gender = [False]*2 + [False]*13 + [True]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_behavior = [False]*2 + [False]*13 + [False]*10 + [True]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_citizen = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [True]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_occupation = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [True]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_height = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [True]*6 + [False]*6 + [False]*12 + [False]*12
visible_weight = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [True]*6 + [False]*12 + [False]*12
visible_eyes = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [True]*12 + [False]*12
visible_hair = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [True]*12

#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)

#name of axis
prop = [dict(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper"),
         dict(x=0.493,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper")]

yHeight = [dict(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Height [cm]",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper"),
         dict(x=0.493,
              y=0.5,
              showarrow=False,
              text="Height [cm]",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper")]
yWeight = [dict(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Weight [kg]",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper"),
         dict(x=0.49,
              y=0.5,
              showarrow=False,
              text="Weight [kg]",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper")]

numb = [dict(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Number",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper"),
         dict(x=0.493,
              y=0.5,
              showarrow=False,
              text="Number",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper")]

#Add first legend
fig.update_layout(annotations=[go.layout.Annotation(x=-0.07,
                                          y=0.5,
                                          showarrow=False,
                                          text="Number",
                                          font=dict(size=13),
                                          textangle=-90,
                                          xref="paper",
                                          yref="paper"),
                               go.layout.Annotation(x=0.493,
                                          y=0.5,
                                          showarrow=False,
                                          text="Number",
                                          font=dict(size=13),
                                          textangle=-90,
                                          xref="paper",
                                          yref="paper")])
                               






# apply filters
fig.update_layout(
    updatemenus=[
        #category button
        go.layout.Updatemenu(
            active=0,
            pad={"r": 10, "t": 10},
            x=-0.22,
            y=1.15,
            xanchor='left',
            yanchor='top',
            buttons=list([
                dict(label="Number",
                     method="update",
                     args=[{"visible": visible_number},
                           {"annotations":numb}]),
                dict(label="Marital Status",
                     method="update",
                     args=[{"visible": visible_marital},
                           {"annotations": prop}]),
                dict(label="Gender",
                     method="update",
                     args=[{"visible": visible_gender},
                           {"annotations":prop}]),
                dict(label="Behavior",
                     method="update",
                     args=[{"visible": visible_behavior},
                           {"annotations":prop}]),
                dict(label="Citizenship",
                     method="update",
                     args=[{"visible": visible_citizen},
                           {"annotations":numb}]),
                dict(label="Occupation",
                     method="update",
                     args=[{"visible": visible_occupation},
                           {"annotations":numb}]),
                dict(label="Height",
                     method="update",
                     args=[{"visible": visible_height},
                           {"annotations":yHeight}]),
                dict(label="Weight",
                     method="update",
                     args=[{"visible": visible_weight},
                           {"annotations":yWeight}]),
                dict(label="Eyes",
                     method="update",
                     args=[{"visible": visible_eyes},
                           {"annotations":numb}]),
                dict(label="Hair",
                     method="update",
                     args=[{"visible": visible_hair},
                           {"annotations":numb}])
                ]),
        ),
        #linlog menu
        go.layout.Updatemenu(
            active = 0,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'yaxis2': lin2}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'yaxis2': log2}])
            ])
    ])



#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename='general_caract_line.html', auto_open=False,)
#print(url)

# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
#print('Save graph to clipboard')
#os.system("echo '%s' | pbcopy" % str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
fig.show()

11 General plots along the Time

In [49]:
category = 'Marital Status'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url

pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
In [50]:
category = 'Gender'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
In [51]:
def top_characteristics(dc_df, marvel_df, characteristic = '', top=10, year=False):
    '''
    This function return a list of the top categories for a characteristic
    dc_df: DC dataframe
    dc_marvel: Marvel dataframe
    characteristic: ['Citizenship', 'Marital Status', 'Occupation', 'Education', 'Gender','Eyes', 'Hair', 'Place of Birth']
    '''
    if year:
        marvel_df = marvel_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL']) 
        dc_df = dc_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL']) 
        dc_df = dc_df[dc_df['years']==year]
        marvel_df = marvel_df[marvel_df['years']==year]
    
    subdc = pd.DataFrame(dc_df[characteristic].value_counts())
    if 'Unknown' in subdc.index:
        subdc = subdc.drop('Unknown') 
    subdc = subdc.reset_index()
    subdc['Scores'] = subdc.index
    subdc = subdc.set_index('index')
    
    submarvel = pd.DataFrame(marvel_df[characteristic].value_counts())
    if 'Unknown' in submarvel.index:
        submarvel = submarvel.drop('Unknown') 
    submarvel = submarvel.reset_index()
    submarvel['Scores'] = submarvel.index
    submarvel = submarvel.set_index('index')
    
    dc_marvel = submarvel.join(subdc, how='inner', lsuffix='_Marvel', rsuffix='_DC')
    dc_marvel['Scores'] = dc_marvel['Scores_Marvel'] + dc_marvel['Scores_DC']
    dc_marvel = dc_marvel.sort_values('Scores', ascending=True)
    return dc_marvel.iloc[:top].index.tolist()
In [52]:
#The above function is so slow that we have to create a dictionary of it.. to loose time just once
try:
    top_label[(2000,'Gender')]
except:
    try:
        top_label = pd.read_pickle("data_pickle/top_characteristic.pkl")
    except:
        #from function of Pilou, make a dictionary
        category = {0:'Gender',
                    1:'Marital Status', 
                    2:'Citizenship',
                    3:'Occupation',
                    4:'Education',
                    5:'First_apparition',
                    6:'Height in float',
                   7:'Weight in float',
                   8:'Eyes',
                   9:'Hair',
                   10:'Behavior'}
        years = range(1930,2021)
        top_label = dict([])
        for year in years:
            for i in range(11):
                top_label[(category[i],year)]=top_characteristics(dc_pers, marvel_pers, characteristic = category[i], top=6, year=year)

        f = open("data_pickle/top_characteristic.pkl","wb")
        pickle.dump(top_label,f)
        f.close()
        
In [53]:
[top_label[('Citizenship',1939)]]
Out[53]:
[['American', 'Chinese']]
In [54]:
category = 'Citizenship'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/pandas/core/series.py:1152: FutureWarning:


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike

In [55]:
category = 'Occupation'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/pandas/core/series.py:1152: FutureWarning:


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike

In [56]:
category = 'Education'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/pandas/core/series.py:1152: FutureWarning:


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike

In [57]:
category = 'First_apparition'
name= 'First appearance'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True).sort_index()
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True).sort_index()
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts().sort_index()
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts().sort_index()

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [name,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[name,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=name,
                  xaxis_title="Year of creation of the character",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/pandas/core/ops/__init__.py:1115: FutureWarning:

elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison

In [58]:
from scipy.stats import norm
category = 'Height in float'
name= 'Height'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True).sort_index()
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True).sort_index()
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts().sort_index()
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts().sort_index()
    
    mu_marvel, std_marvel = norm.fit(hist_marvel)
    mu_dc, std_dc = norm.fit(hist_dc)
    
    x = np.linspace(1930, 2020, 100)
    p_marvel = norm.pdf(x, mu_marvel, std_marvel)
    p_dc = norm.pdf(x, mu_dc, std_dc)
    
    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_scatter(name='Marvel', x=x1, y=hist_marvel, mode='markers',
                #text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [name,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_scatter(name='DC Comic', x=x2, y=hist_dc, mode='markers',
                #text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[name,year],
                visible=False, marker=dict(color="#0F4C81"))
    

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=name,
                  xaxis_title="Height [cm]",
                  xaxis_range=[0,400],
                  yaxis_type="linear",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 0,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=name+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/pandas/core/ops/__init__.py:1115: FutureWarning:

elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison

/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/scipy/stats/_continuous_distns.py:268: RuntimeWarning:

Mean of empty slice.

/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/numpy/core/_methods.py:85: RuntimeWarning:

invalid value encountered in double_scalars

/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/scipy/stats/_continuous_distns.py:273: RuntimeWarning:

Mean of empty slice.

/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py:897: RuntimeWarning:

invalid value encountered in greater_equal

/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py:897: RuntimeWarning:

invalid value encountered in less_equal

In [59]:
category = 'Weight in float'
name= 'Weight'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True).sort_index()
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True).sort_index()
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts().sort_index()
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts().sort_index()

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_scatter(name='Marvel', x=x1, y=hist_marvel, mode='markers',
                #text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [name,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_scatter(name='DC Comic', x=x2, y=hist_dc, mode='markers',
                #text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[name,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=name,
                  xaxis_title="Weight [kg]",
                  xaxis_range=[0,400],
                  yaxis_type="linear",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 0,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=name+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/pandas/core/ops/__init__.py:1115: FutureWarning:

elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison

In [60]:
category = 'Eyes'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
/Users/jordanmetz/anaconda3/envs/ada/lib/python3.7/site-packages/pandas/core/series.py:1152: FutureWarning:


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike

In [61]:
category = 'Hair'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
    
    count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
    count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]

    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
In [62]:
category = 'Behavior'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)

#Along the time
for year in years:
    #take statistics
    if year > 1940:
        hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[['Good','Neutral','Bad']]
        hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[['Good','Neutral','Bad']]

        count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[['Good','Neutral','Bad']]
        count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[['Good','Neutral','Bad']]
    else:
        hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)
        hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)

        count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()
        count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()
    
        
    x1 = hist_marvel.index
    x2 = hist_dc.index
    
    fig.add_bar(name='Marvel', x=x1, y=hist_marvel, 
                text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
                visible=False, marker=dict(color="#990000"))
    fig.add_bar(name='DC Comic', x=x2, y=hist_dc, 
                text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
                hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
                visible=False, marker=dict(color="#0F4C81"))

# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True

# Change the bar mode


# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
    )
    step["args"][1][i*2] = True  # Toggle i'th trace to "visible"
    step["args"][1][i*2+1] = True  # Toggle i'th trace to "visible"
    step["label"]=str(1930+i)
    #if i == 0:
    #    step['label'] = 'Global trend'
    #else:
    #    step['label']=str(1930+i)
    steps.append(step)
    

sliders = [dict(
    active=89,
    currentvalue={"prefix": "Year: "},
    pad={"t": 20},
    steps=steps,
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(barmode='group', 
                  title=category,
                  xaxis_title="Categories",
                  yaxis_type="log",
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)
# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Proportion",
              font=dict(size=12),
              textangle=-90,
              xref="paper",
              yref="paper")]
fig.update_layout(annotations=ylabel)

fig.update_layout(
    updatemenus=[
        #linlog menu
        go.layout.Updatemenu(
            active = 1,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'annotations': ylabel}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'annotations': ylabel}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
############################################################################################################### # This plot doesn't Work, we can't have buttons AND sliders ############################################################################################################### category = {0:'Gender', 1:'Marital Status', 2:'Citizenship', 3:'Occupation', 4:'Education', 5:'First_apparition', 6:'Height in float', 7:'Weight in float', 8:'Eyes', 9:'Hair', 10:'Behavior'} label = {0:'Gender', 1:'Marital Status', 2:'Citizenship', 3:'Occupation', 4:'Education', 5:'First apparition', 6:'Height in float', 7:'Weight in float', 8:'Eyes', 9:'Hair', 10:'Behavior'} #create figure fig = go.Figure() years = range(1995,2005) n_year = len(years) marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL']) dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL']) #Along the time for i in range(10): for year in years: #take statistics hist_marvel = marvel_explode[(marvel_explode['years']==year)&marvel_explode[category[i]]!='Unknown'][category[i]].value_counts(normalize=True)[top_label[(category[i],year)]] hist_dc = dc_explode[(dc_explode['years']==year)&dc_explode[category[i]]!='Unknown'][category[i]].value_counts(normalize=True)[top_label[(category[i],year)]] count_marvel = marvel_explode[(marvel_explode['years']==year)&marvel_explode[category[i]]!='Unknown'][category[i]].value_counts()[top_label[(category[i],year)]] count_dc = dc_explode[(dc_explode['years']==year)&dc_explode[category[i]]!='Unknown'][category[i]].value_counts()[top_label[(category[i],year)]] x1 = hist_marvel.index x2 = hist_dc.index fig.add_bar(name='Marvel', x=x1, y=hist_marvel, text=count_marvel, texttemplate = 'Marvel:
%{label}:
%{text:s}
(%{y:%.1f})', textposition='auto', textangle=0, hovertemplate = '%{meta[0]}
%{meta[1]}


Marvel:
%{x}: %{text:s} (%{y:%.1f})', hovertext=count_marvel, meta = [category,year], visible=False, marker=dict(color="#990000")) fig.add_bar(name='DC Comic', x=x2, y=hist_dc, text=count_dc, texttemplate = 'DC:
%{label}:
%{text:s}
(%{y:%.1f})', textposition='auto', textangle=0, hovertemplate = '%{meta[0]}
%{meta[1]}


DC:
%{x}: %{text:s} (%{y:%.1f})', hovertext=count_dc, meta=[category,year], visible=False, marker=dict(color="#0F4C81")) def trace(cat,y, id_=0): idx = ((y-min(years))+len(years)*cat)*2 if id_ == 0: return idx,idx+1 if id_ == 1: return idx if id_ == 2: return idx+1 # Make the year 2000 trace visible idx1, idx2 = trace(0,2000) fig.data[idx1].visible = True fig.data[idx2].visible = True # Change the bar mode # Create and add slider step_cat = dict([]) for cat in range(10): steps = [] for year in years: step = dict( method="restyle", args=[{"visible":[False] * len(fig.data) }], ) step["args"][0]["visible"][trace(cat,year,1)] = True # Toggle i'th trace to "visible" step["args"][0]["visible"][trace(cat,year,2)] = True # Toggle i'th trace to "visible" step["label"]=str(year) #if i == 0: # step['label'] = 'Global trend' #else: # step['label']=str(1930+i) steps.append(step) step_cat[cat] = steps sliders = dict([]) for cat in range(10): sliders[cat] = [dict( active=5, currentvalue={"prefix": "Year: "}, pad={"t": 20}, steps=step_cat[cat], )] fig.update_layout( sliders=sliders[0] ) fig.update_layout(barmode='group', title="Marital Status", xaxis_title="Categories", yaxis_title="Proportion in log", font=dict(family='Komika Hand', size=11, color="#7f7f7f")) #Menu #lin or log scale button lin1 = go.layout.YAxis(visible = True, type='linear') lin2 = go.layout.YAxis(visible = True, type='linear', anchor='free', position=0.55) log1 = go.layout.YAxis(visible = True, type='log') log2 = go.layout.YAxis(visible = True, type='log', anchor='free', position=0.55) #name of axis prop = [dict(x=-0.07, y=0.5, showarrow=False, text="Proportion", font=dict(size=13), textangle=-90, xref="paper", yref="paper")] yHeight = [dict(x=-0.07, y=0.5, showarrow=False, text="Height [cm]", font=dict(size=13), textangle=-90, xref="paper", yref="paper")] yWeight = [dict(x=-0.07, y=0.5, showarrow=False, text="Weight [kg]", font=dict(size=13), textangle=-90, xref="paper", yref="paper")] numb = [dict(x=-0.07, y=0.5, showarrow=False, text="Number", font=dict(size=13), textangle=-90, xref="paper", yref="paper")] # visible when pressing visible = dict([]) for cat in range(10): visible[cat]=[False]*len(fig.data) visible[cat][trace(cat,2000,1)]=True visible[cat][trace(cat,2000,2)]=True # apply filters fig.update_layout( updatemenus=[ #category button go.layout.Updatemenu( active=0, pad={"r": 10, "t": 10}, x=-0.22, y=1.15, xanchor='left', yanchor='top', buttons=list([ dict(label=category[0], method="update", args=[{"visible":visible[0], "overwrite":True, "sliders":sliders[0]}, {"annotations":numb, "overwrite": True, "sliders":sliders[0]}]), dict(label=category[1], method="update", args=[{"visible":visible[1], "overwrite":True, "sliders":sliders[1]}, {"annotations": prop, "overwrite": True, "sliders":sliders[1]}]), dict(label=category[2], method="update", args=[{"visible":visible[2], "overwrite":True, "sliders":sliders[2]}, {"annotations":prop, "overwrite": True, "sliders":sliders[2]}]), dict(label=category[3], method="update", args=[{"visible":visible[3], "overwrite":True, "sliders":sliders[3]}, {"annotations":prop, "overwrite": True, "sliders":sliders[3]}]), dict(label=category[4], method="update", args=[{"visible":visible[4], "overwrite":True, "sliders":sliders[4]}, {"annotations":numb, "overwrite": True, "sliders":sliders[4]}]), dict(label=category[5], method="update", args=[{"visible":visible[5], "overwrite":True, "sliders":sliders[5]}, {"annotations":numb, "overwrite": True, "sliders":sliders[5]}]), dict(label=category[6], method="update", args=[{"visible":visible[6], "overwrite":True, "sliders":sliders[6]}, {"annotations":yHeight, "overwrite":True, "sliders":sliders[6]}]), dict(label=category[7], method="update", args=[{"visible":visible[7], "overwrite":True, "sliders":sliders[7]}, {"annotations":yWeight, "overwrite":True, "sliders":sliders[7]}]), dict(label=category[8], method="update", args=[{"visible":visible[8], "overwrite":True, "sliders":sliders[8]}, {"annotations":numb, "overwrite":True, "sliders":sliders[8]}]), dict(label=category[9], method="update", args=[{"visible":visible[9], "overwrite":True, "sliders":sliders[9]}, {"annotations":numb, "overwrite":True, "sliders":sliders[9]}]) ]), ), #linlog menu go.layout.Updatemenu( active = 0, x=1, y=1.2, pad={"r": 10, "t": 10}, xanchor='right', yanchor='top', buttons=[ dict(label='Lin-scale', method='relayout', args=[{'yaxis': lin1}]), dict(label='Log-scale', method='relayout', args=[{'yaxis': log1}]) ]) ]) #fig.show() #This part save the plot chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO') url = py.plot(fig, filename='Category_and_slide.html', auto_open=False,) print(url)

Diversity

In [63]:
category = {0:'First_apparition',
            1:'Marital Status', 
            2:'Citizenship',
            3:'Occupation',
            4:'Education',
            5:'Gender',
            6:'Height in float',
           7:'Weight in float',
           8:'Eyes',
           9:'Hair',
           10:'Behavior'}

label = {0:'New character',
            1:'Marital Status', 
            2:'Citizenship',
            3:'Occupation',
            4:'Education',
            5:'Gender',
            6:'Height in float',
           7:'Weight in float',
           8:'Eyes',
           9:'Hair',
           10:'Behavior'}


fig = go.Figure()

marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
visible={}
buttons=[]

# ylabel 
ylabel = [go.layout.Annotation(x=-0.07,
              y=0.5,
              showarrow=False,
              text="Number of categories",
              font=dict(size=13),
              textangle=-90,
              xref="paper",
              yref="paper")]

#Add annotation
new_charac = [go.layout.Annotation(x=1938, y=39, xref="x", yref="y", text="Super-Man",
                                   font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-100),
              go.layout.Annotation(x=1941, y=360, xref="x", yref="y", text="Captain America",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-40),
              go.layout.Annotation(x=1962, y=201, xref="x", yref="y", text="Spider-Man",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-40),
              go.layout.Annotation(x=1963, y=221, xref="x", yref="y", text="Iron-Man\n Avengers",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-60),
              go.layout.Annotation(x=1974, y=455, xref="x", yref="y", text="Wolverine",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-40),
              go.layout.Annotation(x=1939, y=40, xref="x", yref="y", text="Batman",
                                   font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-80),
              go.layout.Annotation(x=1991, y=587, xref="x", yref="y", text="Deadpool",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-80),
              go.layout.Annotation(x=1973, y=435, xref="x", yref="y", text="Thanos",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-100),
              go.layout.Annotation(x=1961, y=115, xref="x", yref="y", text="Atom",
                                   font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-20),
              go.layout.Annotation(x=1980, y=368, xref="x", yref="y", text="She-Hulk",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-20),
              go.layout.Annotation(x=1959, y=64, xref="x", yref="y", text="Super Girl",
                                   font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-20),
              go.layout.Annotation(x=1998, y=395, xref="x", yref="y", text="Spider-Girl",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-20)
             ]
new_citiz = [go.layout.Annotation(x=1968, y=82, xref="x", yref="y", text="First Swiss",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-40)]
new_gender = [go.layout.Annotation(x=2002, y=5, xref="x", yref="y", text="First Transgender",
                                   font=dict(family="Komika Hand",size=15,color="white"), showarrow=True,
                                   arrowhead=7, ax=0, ay=120),
              go.layout.Annotation(x=1949, y=3, xref="x", yref="y", text="First Genderfluid / Loki",
                                   font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
                                   arrowhead=7, ax=0, ay=-60)]

for i in range(10):
    if i ==0:
        marvel_line = marvel_pers['First_apparition'].value_counts().sort_index()
        dc_line = dc_pers['First_apparition'].value_counts().drop(index=2020).sort_index()
    elif category[i]=='Citizenship':
        #split citizenship
        marvel_explode['Citizenship'] = marvel_explode['Citizenship'].apply(lambda s: list(s.split(',')))
        marvel_line = marvel_explode.explode('Citizenship').dropna(subset=['Citizenship']).drop_duplicates(subset=['years','URL','Citizenship']).groupby('years')[category[i]].value_counts()
        marvel_line = marvel_line.unstack(level=1).count(axis=1).sort_index()
        
        dc_explode['Citizenship'] = dc_explode['Citizenship'].apply(lambda s: list(s.split(',')))
        dc_line = dc_explode.explode('Citizenship').dropna(subset=['Citizenship']).drop_duplicates(subset=['years','URL','Citizenship']).groupby('years')[category[i]].value_counts()
        dc_line = dc_line.unstack(level=1).count(axis=1).sort_index()
        
    else:
        marvel_line = marvel_explode.groupby('years')[category[i]].value_counts()
        marvel_line = marvel_line.unstack(level=1).count(axis=1).sort_index()
    
        dc_line = dc_explode.groupby('years')[category[i]].value_counts()
        dc_line = dc_line.unstack(level=1).count(axis=1).sort_index()
    
    fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line ,mode='lines',name='Marvel',line=dict(color="#990000", width=3),visible=False, fill='tozeroy'))
    fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line ,mode='lines',name='DC Comic',line=dict(color="#0F4C81", width=3),visible=False, fill='tozeroy'))
    visible[i] = [False]*20
    visible[i][2*i] = True
    visible[i][2*i+1] = True
    
    if i==0:
        buttons.append(dict(label=label[i],
                                  method="update",
                                  args=[{"visible": visible[i]},
                                   {"title": 'Diversity analysis',
                                    'font':dict(family='Komika Hand',
                                                size=11,
                                                color="#7f7f7f"),
                                    "annotations":ylabel+new_charac}]))
    elif category[i]=='Citizenship':
        buttons.append(dict(label=label[i],
                                  method="update",
                                  args=[{"visible": visible[i]},
                                   {"title": 'Diversity analysis',
                                    'font':dict(family='Komika Hand',
                                                size=11,
                                                color="#7f7f7f"),
                                    "annotations":ylabel+new_citiz}]))
    elif category[i]=='Gender':
        buttons.append(dict(label=label[i],
                                  method="update",
                                  args=[{"visible": visible[i]},
                                   {"title": 'Diversity analysis',
                                    'font':dict(family='Komika Hand',
                                                size=11,
                                                color="#7f7f7f"),
                                    "annotations":ylabel+new_gender}]))
             
    else:
        buttons.append(dict(label=label[i],
                              method="update",
                              args=[{"visible": visible[i]},
                               {"title": 'Diversity analysis',
                                'font':dict(family='Komika Hand',
                                            size=11,
                                            color="#7f7f7f"),
                                "annotations":ylabel}]))
              
# Initialize the axis
fig.update_xaxes(title_text="Year")
#fig.update_yaxes(title_text="Number of categories")


#fig.update_yaxes(title_text="Proportion", row=1, col=1)
#fig.update_yaxes(title_text="Proportion", row=1, col=2)

#intialize title and font
fig.update_layout(barmode='group', 
                  title=dict(text="Diversity analysis", 
                             font= {'family':'Komika Hand', 
                                      'color':'#7f7f7f', 
                                      'size':20}, 
                             x=0, 
                             xanchor='left', 
                             y=0.95, 
                             yanchor='top'),
                  font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f"))

# Make the first traces visible
for i in range(2):
    fig.data[i].visible = True

#### BUTTTOONNN####

#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
                      type='linear')
lin2 = go.layout.YAxis(visible = True,
                      type='linear',
                      anchor='free',
                      position=0.55)
log1 = go.layout.YAxis(visible = True,
                      type='log')
log2 = go.layout.YAxis(visible = True,
                      type='log',
                      anchor='free',
                      position=0.55)


#Add first legend
#fig.add_annotation(ylabel+new_charac)
fig.update_layout(annotations=ylabel+new_charac)
                               



# apply filters
fig.update_layout(
    updatemenus=[
        #category button
        go.layout.Updatemenu(
            active=0,
            pad={"r": 10, "t": 10},
            x=-0.22,
            y=1.15,
            xanchor='left',
            yanchor='top',
            buttons=list(buttons),
        ),
        #linlog menu
        go.layout.Updatemenu(
            active = 0,
            x=1,
            y=1.2,
            pad={"r": 10, "t": 10},
            xanchor='right',
            yanchor='top',
            buttons=[
                dict(label='Lin-scale',
                     method='relayout',
                     args=[{'yaxis': lin1,
                           'yaxis2': lin2}]),
                dict(label='Log-scale',
                     method='relayout',
                     args=[{'yaxis': log1,
                           'yaxis2': log2}])
            ])
    ])

fig.show()

#This part save the plot
#chart_studio.tools.set_credentials_file(username='Ahko26', api_key='VBmVVv93RzXI5FVJdRj8')
#url = py.plot(fig, filename='Diversity.html', auto_open=False,)
#print(url)
#tls.get_embed(str(url))

pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))

merge the dataframe reduced by alias

In [64]:
#add a tag to know where they are from
marvel_alias['Comic'] = 'Marvel'
dc_alias['Comic'] = 'DC'

#attribut we want to save
attribute = ['URL', 'Real Name', 'Current Alias', 'Comic', 'Identity', 'Citizenship', 'Marital Status',\
            'Occupation', 'Education', 'Gender', 'Height in float', 'Weight in float', 'Eyes', 'Hair',\
            'Place of Birth','Behavior',\
            'Number_of_apparitions', 'years', 'First_apparition', 'Longevity', 'Score appearance',\
            'Score longevity', 'Score Famous']


pers_alias = pd.concat([marvel_alias[attribute],dc_alias[attribute]],axis=0)
pers_alias.rename(inplace=True, columns={'Height in float':'Height', 'Weight in float':'Weight',\
                                   'Number_of_apparitions':'Nb appearance','years':'Years',\
                                   'First_apparition':'First appearance', })
In [65]:
def add_epoch(first):
    if first<1960.:
        return 1
    elif (first>=1960.) & (first <1980.):
        return 2
    elif (first>=1980.) & (first <2000.):
        return 3
    else:
        return 4
    
pers_alias['Epoch'] = pers_alias['First appearance'].apply(add_epoch)
#code used to find character pers_alias[(pers_alias['Comic']=='Marvel')&(pers_alias['Epoch']==4)].sort_values(["Score Famous"], ascending = False).head(20)[['First appearance','Current Alias','Comic']]

Graph of separation between famous and not famous

In [66]:
#Plot it with ploty
fig = go.Figure()

pers_score_marvel = pd.DataFrame(pers_alias[pers_alias['Comic']=='Marvel']['Score Famous'].value_counts(bins=101)).reset_index()
pers_score_marvel['mid'] = [(x.left+ x.right)/2 for x in pers_score_marvel['index']]
pers_high_marvel = len(pers_alias[(pers_alias['Comic']=='Marvel')&(pers_alias['Score Famous']>66)])
pers_med_marvel = len(pers_alias[(pers_alias['Score Famous']<=66)&(pers_alias['Score Famous']>33)&(pers_alias['Comic']=='Marvel')])
pers_low_marvel = len(pers_alias[(pers_alias['Score Famous']<=33)&(pers_alias['Comic']=='Marvel')])
total_marvel = len(pers_alias[pers_alias['Comic']=='Marvel']['Score Famous'])

pers_score_dc = pd.DataFrame(pers_alias[pers_alias['Comic']=='DC']['Score Famous'].value_counts(bins=101)).reset_index()
pers_score_dc['mid'] = [(x.left+ x.right)/2 for x in pers_score_dc['index']]
pers_high_dc = len(pers_alias[(pers_alias['Comic']=='DC')&(pers_alias['Score Famous']>66)])
pers_med_dc = len(pers_alias[(pers_alias['Score Famous']<=66)&(pers_alias['Score Famous']>33)&(pers_alias['Comic']=='DC')])
pers_low_dc = len(pers_alias[(pers_alias['Score Famous']<=33)&(pers_alias['Comic']=='DC')])
total_dc = len(pers_alias[pers_alias['Comic']=='DC']['Score Famous'])

pers_score = pd.DataFrame(pers_alias['Score Famous'].value_counts(bins=101)).reset_index()
pers_score['mid'] = [(x.left+ x.right)/2 for x in pers_score['index']]
pers_high = pers_high_marvel + pers_high_dc
pers_med = pers_med_marvel + pers_med_dc
pers_low = pers_low_marvel + pers_low_dc
total = total_marvel + total_dc

#'''
fig.add_bar(name='Marvel', x=pers_score_marvel['mid'], y=pers_score_marvel['Score Famous'], width=0.98,
                visible=False, marker=dict(color="#990000"),
           hovertemplate = '<b>Score: %{x}</b> <br><br><b>Marvel:</b> %{y} <extra></extra>')
fig.add_bar(name='DC Comics', x=pers_score_dc['mid'], y=pers_score_dc['Score Famous'], width=0.98,
                visible=False, marker=dict(color="#0F4C81"),
               hovertemplate = '<b>Score: %{x}</b> <br><br><b>DC Comics:</b> %{y} <extra></extra>') 
fig.add_bar(name='Marvel + DC Comics', x=pers_score['mid'], y=pers_score['Score Famous'], width=0.98,
                visible=True, marker=dict(color="purple"),
            hovertemplate = '<b>Score: %{x}</b> <br><br><b>Marvel + DC Comics:</b> %{y} <extra></extra>')

'''#this part is too large for plotly, but i dont know why
#hovertemplate = '<b>Score: %{x}</b> <br><br><b>Marvel:</b> %{y} <br> DC: %{meta[0]} <br> <i>Total: %{meta[1]}</i><extra></extra>', meta = [pers_score_dc,pers_score],
fig.add_trace(go.Histogram(x=pers[pers['Comic']=='Marvel']['Score Famous'], nbinsx=1, name='Marvel', visible=False, 
                          marker=dict(color="#990000")
                          ))
#hovertemplate = '<b>Score: %{x}</b> <br><br><b>DC:</b> %{y} <br> Marvel: %{meta[0]} <br> <i>Total: %{meta[1]}</i><extra></extra>', meta = [pers_score_dc,pers_score],
fig.add_trace(go.Histogram(x=pers[pers['Comic']=='DC']['Score Famous'], nbinsx=1, name='DC Comics', visible=False,
                          marker=dict(color="#0F4C81")
                          ))
fig.add_trace(go.Histogram(x=pers['Score Famous'], nbinsx=1, name='Marvel + DC Comics', visible=True,
                          marker=dict(color="purple")
                          ))
'''
# Add text labels
text_marvel = [go.layout.Annotation(x=16, y=0.98, xref="x", yref="paper", yanchor= 'top', xanchor='center',
                            text="<b> Forgotten </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_low_marvel,pers_low_marvel/total_marvel*100),
                            font=dict(family="Komika Hand",size=10,color="#990000"), showarrow=False,
                            arrowhead=7, ax=0, ay=0),
                go.layout.Annotation(x=50, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
                            text="<b> Intermediate </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_med_marvel,pers_med_marvel/total_marvel*100),
                            font=dict(family="Komika Hand",size=10,color="#990000"), showarrow=False,
                            arrowhead=7, ax=0, ay=0),
                go.layout.Annotation(x=84, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
                            text="<b> Famous </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_high_marvel,pers_high_marvel/total_marvel*100),
                            font=dict(family="Komika Hand",size=10,color="#990000"), showarrow=False,
                            arrowhead=7, ax=0, ay=0)]

text_total = [go.layout.Annotation(x=16, y=0.98, xref="x", yref="paper", yanchor= 'top', xanchor='center',
                            text="<b> Forgotten </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_low,pers_low/total*100),
                            font=dict(family="Komika Hand",size=10,color="purple"), showarrow=False,
                            arrowhead=7, ax=0, ay=0),
                go.layout.Annotation(x=50, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
                            text="<b> Intermediate </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_med,pers_med/total*100),
                            font=dict(family="Komika Hand",size=10,color="purple"), showarrow=False,
                            arrowhead=7, ax=0, ay=0),
                go.layout.Annotation(x=84, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
                            text="<b> Famous </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_high,pers_high/total*100),
                            font=dict(family="Komika Hand",size=10,color="purple"), showarrow=False,
                            arrowhead=7, ax=0, ay=0)]
text_dc = [go.layout.Annotation(x=16, y=0.98, xref="x", yref="paper", yanchor= 'top', xanchor='center',
                            text="<b> Forgotten </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_low_dc,pers_low_dc/total_dc*100),
                            font=dict(family="Komika Hand",size=10,color="#0F4C81"), showarrow=False,
                            arrowhead=7, ax=0, ay=0),
                go.layout.Annotation(x=50, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
                            text="<b> Intermediate </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_med_dc,pers_med_dc/total_dc*100),
                            font=dict(family="Komika Hand",size=10,color="#0F4C81"), showarrow=False,
                            arrowhead=7, ax=0, ay=0),
                go.layout.Annotation(x=84, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
                            text="<b> Famous </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_high_dc,pers_high_dc/total_dc*100),
                            font=dict(family="Komika Hand",size=10,color="#0F4C81"), showarrow=False,
                            arrowhead=7, ax=0, ay=0)]


#create buttons
buttons=[dict(label='Both Comics',
                            method="update",
                            args=[{"visible": [False,False,True]},
                                {"annotations": text_total}]),
               dict(label='Marvel',
                            method="update",
                            args=[{"visible": [True, False, False]},
                                {"annotations": text_marvel}]),
               dict(label='DC Comics',
                            method="update",
                            args=[{"visible": [False, True, False]},
                                {"annotations": text_dc}])
              ]

fig.update_layout(
    bargap=0.1,
    title=dict(text="Repartition of the character in groups of celebrity", 
                             font= {'family':'Komika Hand', 
                                      'color':'#7f7f7f', 
                                      'size':20}),
    xaxis_title="Famousness Score",
    yaxis_title="Number",
    yaxis_type="log",
    font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f")
)

# apply filters
fig.update_layout(
    updatemenus=[
        #category button
        go.layout.Updatemenu(
            type = "buttons",
            direction= 'down',
            borderwidth = 0.5,
            active=0,
            pad={"r": 10, "t": 10, "b":20},
            x=-0.22,
            y=1.15,
            xanchor='left',
            yanchor='top',
            buttons=list(buttons)
        )
    ])


# Add shape regions
fig.update_layout(
    shapes=[
        # 1st highlight 0 to 33
        go.layout.Shape(
            type="rect",
            # x-reference is assigned to the x-values
            xref="x",
            # y-reference is assigned to the plot paper [0,1]
            yref="paper",
            x0=0,
            y0=0,
            x1=33,
            y1=1,
            fillcolor="black",
            opacity=0.3,
            layer="above",
            line_width=0,
        ),
        # 2nd highlight 33 to 66
        go.layout.Shape(
            type="rect",
            xref="x",
            yref="paper",
            x0=33,
            y0=0,
            x1=66,
            y1=1,
            fillcolor="grey",
            opacity=0.3,
            layer="above",
            line_width=0,
        ),
        # 3rd highlight 66 to 100
        go.layout.Shape(
            type="rect",
            xref="x",
            yref="paper",
            x0=66,
            y0=0,
            x1=100,
            y1=1,
            fillcolor="white",
            opacity=0.3,
            layer="above",
            line_width=0,
        )
    ]
)

                                  
# Add text labels
fig.update_layout(annotations=text_total)        


fig.show()

#This part save the plot
chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
url = py.plot(fig, filename='repartition_famousness.html', auto_open=False,)
#print(url)
tls.get_embed(str(url))
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))

Histogram of some famous character

In [67]:
########### SI LE TEMPS PERMET; CHANGE OCCUPATION ET EDUCATION
n_charac = 13
#things to display
name = {0: 'Superman',
        1: 'Captain America',
        2: 'Batman',
        3: 'Spider-Man',
        4: 'Iron Man',
        5: 'Green Lantern',
        6: 'Human Torch',
        7: 'Thor',
        8: 'The Flash',
        9: 'Mister Fantastic',
        10: 'Hulk',
        11: 'Wolverine',
        12: 'Wonder Woman'
       }
alias = {0: 'Superman',
        1: 'Captain America',
        2: 'Batman',
        3: 'Spider-Man',
        4: 'Iron Man',
        5: 'Green Lantern',
        6: 'Human Torch',
        7: 'All-Father Thor',
        8: 'The Flash',
        9: 'Mister Fantastic',
        10: 'Hulk',
        11: 'Wolverine',
        12: 'Wonder Woman'
        }
info = {0: ['Alias', 'Current Alias'],
          1: ['Real name', 'Real Name'],
          2: ['First appearance', 'First appearance'],
          3: ['Gender','Gender'],
          4: ['Citizenship','Citizenship'],
          5: ['Place of Birth','Place of Birth'],
          6: ['Marital Status', 'Marital Status'],
          7: ['Height','Height'],
        8: ['Weight','Weight'],
        9: ['Color of Eyes','Eyes'],
        10: ['Color of Hair','Hair'],
        11: ['Score Appearance','Score appearance'],
        12: ['Score Longevity','Score longevity'],
        13: ['Score Famousness','Score Famous']
       }

fig = go.Figure()
characters = dict([])
info_charac=dict([])
buttons=[]
visible = []
#datas...
for i in range(n_charac):
    character = pers_alias[pers_alias['Current Alias']==alias[i]].sort_values('Score Famous',ascending=False).iloc[0]
    text = ''
    for j in range(14):
        if j in [13,11,12]:
            text = text + "<b>{}</b>: {:.1f} <br>".format(info[j][0],character[info[j][1]])
        elif j == 2:
            text = text + "<b>{}</b>: {:.0f} <br>".format(info[j][0],character[info[j][1]])
        elif j in [7]:
            text = text + "<b>{}</b>: {:.1f} cm <br>".format(info[j][0],character[info[j][1]])
        elif j in [8]:
            text = text + "<b>{}</b>: {:.1f} kg <br>".format(info[j][0],character[info[j][1]])
        else:
            text = text + "<b>{}</b>: {}<br>".format(info[j][0],character[info[j][1]])
    line = pd.Series(character['Years']).value_counts()
    
    visible = [False]*n_charac
    visible[i] = True
    if character['Comic']=='Marvel':
        info_charac = [go.layout.Annotation(x=0.015, y=0.92, xref="paper", yref="paper", font=dict(family="Arial",size=10,color="white"),
                                  text= text, yanchor='top', xanchor='left' , width=200, height=200, 
                                 bgcolor = '#990000', bordercolor='white', borderwidth=2
                                  )]
        fig.add_bar(name=name[i], x=line.index, y=line,
                hovertemplate = '<b>%{meta}</b> <br><br>Nb appearance in %{x}: %{y}<extra></extra>', meta=name[i],
                visible=False, marker=dict(color="#990000"))
        buttons.append(dict(label=alias[i],
                            method="update",
                            args=[{"visible": visible},
                                {"annotations": info_charac}]))
        if i==0:
            fig.update_layout(annotations=info_charac)
    else:
        info_charac = [go.layout.Annotation(x=0.015, y=0.92, xref="paper", yref="paper", font=dict(family="Arial",size=10,color="white"),
                                  text= text, yanchor='top', xanchor='left' , width=200, height=200, 
                                 bgcolor = '#0F4C81', bordercolor='white', borderwidth=2
                                  )]
        fig.add_bar(name=name[i], x=line.index, y=line,
                hovertemplate = '<b>%{meta}</b> <br><br>Nb appearance in %{x}: %{y}<extra></extra>', meta=name[i],
                visible=False, marker=dict(color="#0F4C81"))
        buttons.append(dict(label=alias[i],
                            method="update",
                            args=[{"visible": visible},
                                {"annotations": info_charac}]))
        if i==0:
            fig.update_layout(annotations=info_charac)
    
    
#Title, labels...
fig.update_layout(
    bargap=0,
    title=dict(text="Character:", 
                font= {'family':'Komika Hand', 
                        'color':'#7f7f7f', 
                        'size':20}),
    xaxis_title="Year",
    yaxis_title="Nb of apparition",
    font=dict(family='Komika Hand',
                            size=11,
                            color="#7f7f7f")
)

# apply filters
fig.update_layout(
    updatemenus=[
        #category button
        go.layout.Updatemenu(
            type = "buttons",
            direction= 'down',
            borderwidth = 0.5,
            active=0,
            pad={"r": 10, "t": 10, "b":20},
            x=-0.32,
            y=1.15,
            xanchor='left',
            yanchor='top',
            buttons=list(buttons)
        )
    ])

# Make the first trace visible
fig.data[0].visible = True
fig.update_xaxes(range=[1930, 2020])
fig.update_yaxes(range=[0, 500])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='Ahko26', api_key='VBmVVv93RzXI5FVJdRj8')
#url = py.plot(fig, filename='Hist_character.html', auto_open=False,)
#print(url)

#tls.get_embed(str(url))
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))

Prepare the data for the analysis of Longevity, made by Pilou

In [68]:
pers_alias['Famous']=pers_alias['Score Famous'].apply(lambda x : 'Famous' if x > 66 else ('Intermediate' if x > 3 else 'Forgotten')) 
marvel_df = pers_alias[pers_alias['Comic']=='Marvel']
dc_df = pers_alias[pers_alias['Comic']=='DC']
In [69]:
pickle.dump(marvel_df, open('data_pickle/marvel_longevity','wb'))
pickle.dump(dc_df, open('data_pickle/dc_longevity','wb'))
In [ ]: